{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导入所需库\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, auc\n",
    "from sklearn.preprocessing import StandardScaler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 设置绘图风格\n",
    "sns.set(style=\"whitegrid\")\n",
    "plt.rcParams['font.sans-serif'] = ['SimHei']  # 使用黑体字体\n",
    "plt.rcParams['axes.unicode_minus'] = False  # 用于正常显示负号"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Step 1: 加载并预处理数据\n",
    "def load_and_preprocess_data(file_path):\n",
    "    \"\"\"\n",
    "    加载并预处理数据集。\n",
    "    \"\"\"\n",
    "    # 加载数据集\n",
    "    data = pd.read_csv(file_path)\n",
    "    \n",
    "    # 处理缺失值\n",
    "    data['Age'] = data['Age'].fillna(data['Age'].median())\n",
    "    data = data.drop(columns=['Cabin'])\n",
    "    data['Embarked'] = data['Embarked'].fillna(data['Embarked'].mode()[0])\n",
    "    \n",
    "    # 转换类别变量\n",
    "    data['Sex'] = data['Sex'].map({'male': 0, 'female': 1})\n",
    "    data = pd.get_dummies(data, columns=['Embarked'], drop_first=True)\n",
    "    \n",
    "    # 选择相关特征\n",
    "    X = data[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked_Q', 'Embarked_S']]\n",
    "    y = data['Survived']\n",
    "    \n",
    "    return X, y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Step 2: 构建并训练逻辑回归模型\n",
    "def build_and_train_model(X_train, y_train):\n",
    "    \"\"\"\n",
    "    构建并训练逻辑回归模型。\n",
    "    \"\"\"\n",
    "    # **********************************************************************************\n",
    "    #填空1（使用sklearn中的逻辑回归，设置模型的最大迭代次数为 1000，并使用 fit 函数对模型进行训练）\n",
    "    model = __________________\n",
    "    model.__________________\n",
    "    # **********************************************************************************\n",
    "\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Step 3: 评估模型性能并可视化结果\n",
    "def evaluate_model(model, X_test, y_test):\n",
    "    \"\"\"\n",
    "    评估模型性能并可视化结果。\n",
    "    \"\"\"\n",
    "    # 预测\n",
    "    y_pred = model.predict(X_test)\n",
    "    y_pred_prob = model.predict_proba(X_test)[:, 1]\n",
    "    \n",
    "    # 计算准确率\n",
    "    accuracy = accuracy_score(y_test, y_pred)\n",
    "    print(f'模型准确率: {accuracy:.4f}')\n",
    "    \n",
    "    # **********************************************************************************\n",
    "    # 绘制混淆矩阵\n",
    "    # 填空2（根据文字描补全混淆矩阵的生成与可视化）\n",
    "    cm = ____________#（补充代码：生成混淆矩阵）\n",
    "\n",
    "    # 补充代码：绘制混淆矩阵的热力图，包括横轴、纵轴与图表标题\n",
    "    plt.figure(figsize=(6, 5))\n",
    "    sns.heatmap(__, annot=True, fmt=\"d\", cmap=\"Blues\", xticklabels=____________, yticklabels=____________)\n",
    "    plt.title(____________)\n",
    "    plt.xlabel(____________)\n",
    "    plt.ylabel(____________)\n",
    "    plt.show()\n",
    "    # **********************************************************************************\n",
    "    \n",
    "    # 绘制 ROC 曲线\n",
    "    fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)\n",
    "    roc_auc = auc(fpr, tpr)\n",
    "    plt.figure(figsize=(8, 6))\n",
    "    plt.plot(fpr, tpr, label=f\"AUC = {roc_auc:.4f}\")\n",
    "    plt.plot([0, 1], [0, 1], linestyle='--', color='gray')\n",
    "    plt.title(\"ROC 曲线\")\n",
    "    plt.xlabel(\"假阳性率 (FPR)\")\n",
    "    plt.ylabel(\"真正率 (TPR)\")\n",
    "    plt.legend(loc=\"lower right\")\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Step 4: 主函数\n",
    "file_path = \"titanic.csv\"\n",
    "X, y = load_and_preprocess_data(file_path)\n",
    "\n",
    "# 划分训练集与测试集\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
    "\n",
    "# 数据标准化\n",
    "# **********************************************************************************\n",
    "# 填空3（使用sklearn中的StandardScaler创建标准化对象，并对训练集和测试集分别进行标准化）\n",
    "scaler = _______________\n",
    "X_train = _______________\n",
    "X_test = _______________\n",
    "# **********************************************************************************\n",
    "\n",
    "\n",
    "# 构建并训练模型\n",
    "model = build_and_train_model(X_train, y_train)\n",
    "\n",
    "# 评估模型\n",
    "evaluate_model(model, X_test, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
